Dissolved Oxygen Saturation Report

get data across all programs, filter for analyte
library('here')
library('dplyr')
source(here("R/getAllData.R"))

# read from all source files
# full_df <- getAllData() %>%
#   mutate(
#     source = program,
#     site = Monitoring.Location.ID,
#     datetime = Activity.Start.Date.Time,
#     analyte = DEP.Analyte.Name,
#     value = DEP.Result.Value.Number,
#     units = DEP.Result.Unit,
#     latitude = Org.Decimal.Latitude,
#     longitude = Org.Decimal.Longitude,
#     sample_depth = Activity.Depth,
#     .keep = "none"
# )

# read from cached file produced by index.qmd
full_df <- read.csv(here("data", "exports", "allData.csv"))

df <- filter(full_df, analyte == params$analyte)
create .csv of analyte data
# save df to csv
# reduce to only cols we need & save to csv
df %>%
  write.csv(here("data", "exports", "unified-wq-db-samples", paste0(params$analyte, ".csv")))

value Distribution Across All Datasets

display histogram of values
library(ggplot2)
ggplot(df, aes(x = value)) +
  geom_histogram(bins = 30, fill = "blue", color = "black") +
  scale_y_log10() +
  labs(title = "Histogram of Values", x = "Value", y = "Log10(Count)")

Note that values are often not directly comparable due to the use of different units.

Show usage of different units
library(ggplot2)

ggplot(df, aes(x = source, fill = units)) +
  geom_bar() +
  scale_y_log10() +
  labs(
    title = "Count of Observations by Program and Units",
    x = "Program Name",
    y = "Count (Log Scale)",
    fill = "Units Reported"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Show value distributions for each program
library(ggplot2)

ggplot(df, aes(x = units, y = value)) +
  geom_violin(width = 1.2, adjust = 10, fill = "skyblue", color = "black") +
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  scale_y_log10() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "unit", y = "Log10(Value)", title = "Violin + Boxplot")

Values Reported By Program:

Show value distributions for each program
library(ggplot2)

ggplot(df, aes(x = source, y = value)) +
  geom_violin(width = 1.2, adjust = 10, fill = "skyblue", color = "black") +
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  scale_y_log10() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Program", y = "Log10(Value)", title = "Violin + Boxplot")

Station Statistics:

create station statistics dataframe
source(here("R/seasonalMannKendall.R"))
library(lubridate)  # for mdy_hms()
library(pander)  # for display

# create table of samples for each station
samples_df <- df %>%
  # drop any with empty Monitoring.Location.ID
  filter(!is.na(site)) %>%
  # drop any with empty Activity.Start.Date.Time
  filter(!is.na(datetime)) %>%
  # parse the "YYYY-MM-DD" strings into POSIXct
  mutate(datetime = ymd(
          datetime,
          tz = "UTC")) %>%
  distinct()


# add statistics for each station
sample_stats_df <- samples_df %>%
  group_by(source, site) %>%
  reframe(
    {
      tmp <- seasonalMannKendallVectorized(
        datetime,
        value
      )
    },
    n_values = n(),
    mean = mean(value),
    min = min(value),
    max = max(value),
    coefficient.of.variation = sd(value) / mean(value)
  ) %>%
  mutate(
    # create column significant_slope
    significant_slope = ifelse(z <= 0.05, slope, NA_real_),
    pvalue = z
  ) %>%
  # drop unwanted columns added by seasonalMannKendall
  select(
    -z,
    -tau,
    -chi_square
  )



# print(head(sample_stats_df))
# # display sample_stats_df with pander
# pander(sample_stats_df)
save stats to csv
sample_stats_df %>%
  write.csv(here(
    "data", "exports", "seasonal-mann-kendall-stats", paste0(params$analyte, ".csv")))

Download data for this analyte’s statistics here

display with gt
library(gt)
library(scales)
library(tidyselect)  # for all_of()
library(RColorBrewer) # for brewer.pal()

# ── color_column() ─────────────────────────────────────────────────────────────
# gt_tbl   : a gt object that you’ve already created (e.g. `sample_stats_df %>% gt()`)
# df       : the original data.frame (must contain the column you want to color)
# column   : a string, e.g. "slope" or "n_values"
# palette  : a character vector of colours to feed to col_numeric()
#
color_column <- function(gt_tbl, df, column, 
                         palette = c("red", "orange", "yellow", "green", "blue", "violet"),
                         domain = NULL) {
  # 1) Pull out that column’s numeric values
  vals <- df[[column]]
  if (!is.numeric(vals)) {
    stop(sprintf("`%s` is not numeric; data_color() requires a numeric column.", column))
  }
  
  # 2) Compute its min and max (ignoring NA)
  min_val <- min(vals, na.rm = TRUE)
  max_val <- max(vals, na.rm = TRUE)
  if (is.null(domain)) {
    domain <- c(min_val, max_val)
  }
  
  # 3) Call data_color() on the gt table for that single column
  gt_tbl %>%
    data_color(
      columns = all_of(column),
      colors  = col_numeric(
        palette = palette,
        domain  = domain
      )
    )
}
library(dplyr)
library(gt)

# 1) First build your gt table as usual:
gt_tbl <- sample_stats_df %>% 
  gt()

# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in slope color column")
  print(e)
})

tryCatch({
  # pvalue Z
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "z", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in z color column")
  print(e)
})
[1] "Error in z color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "z", palette = (scales::brewer_pal(palette = "Blues"))(9)): `z` is not numeric; data_color() requires a numeric column.>
display with gt
# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "significant_slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in significant_slope color column")
  print(e)
})
[1] "Error in significant_slope color column"
<rlang_error in col_numeric(palette = palette, domain = domain): Wasn't able to determine range of `domain`>
display with gt
tryCatch({
  # mean values blue to red (0 centered)
  min_mean <- min(sample_stats_df$mean,  na.rm = TRUE)
  max_mean <- max(sample_stats_df$mean,  na.rm = TRUE)
  max_abs_mean <- max(abs(min_mean), abs(max_mean))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "mean",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_mean, max_abs_mean)
  )
}, error = function(e) {
  print("Error in mean color column")
  print(e)
})

tryCatch({
  # n values white to green
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "n_values", 
    palette = scales::brewer_pal(palette = "Greens")(9)
  )
}, error = function(e) {
  print("Error in n_values color column")
  print(e)
})

tryCatch({
  # min
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "min", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in min color column")
  print(e)
})

tryCatch({
  # max
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "max", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in max color column")
  print(e)
})

tryCatch({
  # coefficient.of.variation
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "coefficient.of.variation", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in coefficient.of.variation color column")
  print(e)
})

# 4) Render/display:
gt_tbl
source site slope n_values mean min max coefficient.of.variation significant_slope pvalue
FIU_WQMP_HISTORICAL 203 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 209 0.34247582 74 NA NA NA NA NA 0.33746767
FIU_WQMP_HISTORICAL 210 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 212 NA 1 100.18294 100.18294 100.18294 NA NA NA
FIU_WQMP_HISTORICAL 214 NA 3 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 218 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 223 NA 1 96.66486 96.66486 96.66486 NA NA NA
FIU_WQMP_HISTORICAL 224 NA 2 96.01678 95.18214 96.85143 0.012293324 NA NA
FIU_WQMP_HISTORICAL 229 NA 1 87.25896 87.25896 87.25896 NA NA NA
FIU_WQMP_HISTORICAL 231 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 232 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 241 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 243i NA 1 84.10067 84.10067 84.10067 NA NA NA
FIU_WQMP_HISTORICAL 244 NA 4 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 253 NA 4 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 257 9.01799581 14 NA NA NA NA NA 1.00000000
FIU_WQMP_HISTORICAL 266 NA 3 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 282 NA 4 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 284 0.43401146 10 103.91575 76.87777 136.21289 0.175427631 NA 1.00000000
FIU_WQMP_HISTORICAL 285 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 286 NA 1 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 287 NA 4 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 288 NA 2 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 289 NA 2 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 290 NA 1 96.23735 96.23735 96.23735 NA NA NA
FIU_WQMP_HISTORICAL 293 NA 1 94.93463 94.93463 94.93463 NA NA NA
FIU_WQMP_HISTORICAL 294 NA 3 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 295 NA 1 95.18155 95.18155 95.18155 NA NA NA
FIU_WQMP_HISTORICAL 296 0.53576814 8 NA NA NA NA NA 1.00000000
FIU_WQMP_HISTORICAL 297 NA 2 107.45668 98.35624 116.55712 0.119768916 NA NA
FIU_WQMP_HISTORICAL 298 NA 1 105.33426 105.33426 105.33426 NA NA NA
FIU_WQMP_HISTORICAL 299 NA 1 59.74049 59.74049 59.74049 NA NA NA
FIU_WQMP_HISTORICAL 300 0.42435663 17 92.87637 68.51886 122.90485 0.156931019 NA 0.79843196
FIU_WQMP_HISTORICAL 301 0.55611383 66 NA NA NA NA NA 0.09171481
FIU_WQMP_HISTORICAL 307 NA 1 98.41701 98.41701 98.41701 NA NA NA
FIU_WQMP_HISTORICAL 309 NA 1 101.62781 101.62781 101.62781 NA NA NA
FIU_WQMP_HISTORICAL 310 0.49585380 78 NA NA NA NA NA 0.18675157
FIU_WQMP_HISTORICAL 311 0.48553710 64 NA NA NA NA NA 0.34212089
FIU_WQMP_HISTORICAL 312 NA 2 99.20678 97.91498 100.49858 0.018414913 NA NA
FIU_WQMP_HISTORICAL 313 -0.01502767 65 NA NA NA NA NA 0.95574133
FIU_WQMP_HISTORICAL 316 NA 2 95.78017 93.59614 97.96420 0.032247664 NA NA
FIU_WQMP_HISTORICAL 330 NA 3 97.69877 95.70935 98.91301 0.017777273 NA NA
FIU_WQMP_HISTORICAL 337 NA 4 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 500 NA 2 95.72692 95.41264 96.04120 0.004642967 NA NA
FIU_WQMP_HISTORICAL 501 0.25587242 12 NA NA NA NA NA 0.47950012
FIU_WQMP_HISTORICAL 502 -0.17008752 8 97.13241 93.63745 99.28568 0.020024825 NA 1.00000000
FIU_WQMP_HISTORICAL 503 -0.37170328 14 NA NA NA NA NA 1.00000000
FIU_WQMP_HISTORICAL 504 NA 2 97.16719 94.96974 99.36463 0.031982570 NA NA
FIU_WQMP_HISTORICAL 505 NA 2 NA NA NA NA NA NA
FIU_WQMP_HISTORICAL 507 NA 1 93.40655 93.40655 93.40655 NA NA NA
FIU_WQMP_HISTORICAL 508 2.72880742 13 89.08614 44.66796 107.23978 0.194585977 NA 0.21129955
FIU_WQMP_HISTORICAL 509 -0.08058467 15 95.42682 91.47474 98.55585 0.026962948 NA 0.59731157